notebook.community

Edit and run



In [4]:

    
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf

Figure 2.1



In [5]:

    
data = pd.read_csv('data/Advertising.csv')
data.head()



In [6]:

    
plt.figure(figsize=(18, 6))
plt.subplot(131)
sns.regplot(x='TV', y='Sales', data=data, scatter_kws={'color': 'red'})
plt.xlim(-5, 310)
plt.subplot(132)
sns.regplot(x='Radio', y='Sales', data=data, scatter_kws={'color': 'red'})
plt.xlim(-5, 55)
plt.subplot(133)
sns.regplot(x='Newspaper', y='Sales', data=data, scatter_kws={'color': 'red'})
plt.xlim(-5, 110)









    Out[6]:





(-5, 110)

Subsquent figures use data that is not available

Exercise 2.8

8a



In [7]:

    
college = pd.read_csv('data/College.csv')
college.head()









    Out[7]:






  
    
      
      Unnamed: 0
      Private
      Apps
      Accept
      Enroll
      Top10perc
      Top25perc
      F.Undergrad
      P.Undergrad
      Outstate
      Room.Board
      Books
      Personal
      PhD
      Terminal
      S.F.Ratio
      perc.alumni
      Expend
      Grad.Rate
    
  
  
    
      0
      Abilene Christian University
      Yes
      1660
      1232
      721
      23
      52
      2885
      537
      7440
      3300
      450
      2200
      70
      78
      18.1
      12
      7041
      60
    
    
      1
      Adelphi University
      Yes
      2186
      1924
      512
      16
      29
      2683
      1227
      12280
      6450
      750
      1500
      29
      30
      12.2
      16
      10527
      56
    
    
      2
      Adrian College
      Yes
      1428
      1097
      336
      22
      50
      1036
      99
      11250
      3750
      400
      1165
      53
      66
      12.9
      30
      8735
      54
    
    
      3
      Agnes Scott College
      Yes
      417
      349
      137
      60
      89
      510
      63
      12960
      5450
      450
      875
      92
      97
      7.7
      37
      19016
      59
    
    
      4
      Alaska Pacific University
      Yes
      193
      146
      55
      16
      44
      249
      869
      7560
      4120
      800
      1500
      76
      72
      11.9
      2
      10922
      15

8b



In [8]:

    
college.rename(columns={'Unnamed: 0': 'Name'}, inplace=True)
college.head(2)









    Out[8]:






  
    
      
      Name
      Private
      Apps
      Accept
      Enroll
      Top10perc
      Top25perc
      F.Undergrad
      P.Undergrad
      Outstate
      Room.Board
      Books
      Personal
      PhD
      Terminal
      S.F.Ratio
      perc.alumni
      Expend
      Grad.Rate
    
  
  
    
      0
      Abilene Christian University
      Yes
      1660
      1232
      721
      23
      52
      2885
      537
      7440
      3300
      450
      2200
      70
      78
      18.1
      12
      7041
      60
    
    
      1
      Adelphi University
      Yes
      2186
      1924
      512
      16
      29
      2683
      1227
      12280
      6450
      750
      1500
      29
      30
      12.2
      16
      10527
      56

8c i



In [9]:

    
college.describe()









    Out[9]:






  
    
      
      Apps
      Accept
      Enroll
      Top10perc
      Top25perc
      F.Undergrad
      P.Undergrad
      Outstate
      Room.Board
      Books
      Personal
      PhD
      Terminal
      S.F.Ratio
      perc.alumni
      Expend
      Grad.Rate
    
  
  
    
      count
      777.000000
      777.000000
      777.000000
      777.000000
      777.000000
      777.000000
      777.000000
      777.000000
      777.000000
      777.000000
      777.000000
      777.000000
      777.000000
      777.000000
      777.000000
      777.000000
      777.00000
    
    
      mean
      3001.638353
      2018.804376
      779.972973
      27.558559
      55.796654
      3699.907336
      855.298584
      10440.669241
      4357.526384
      549.380952
      1340.642214
      72.660232
      79.702703
      14.089704
      22.743887
      9660.171171
      65.46332
    
    
      std
      3870.201484
      2451.113971
      929.176190
      17.640364
      19.804778
      4850.420531
      1522.431887
      4023.016484
      1096.696416
      165.105360
      677.071454
      16.328155
      14.722359
      3.958349
      12.391801
      5221.768440
      17.17771
    
    
      min
      81.000000
      72.000000
      35.000000
      1.000000
      9.000000
      139.000000
      1.000000
      2340.000000
      1780.000000
      96.000000
      250.000000
      8.000000
      24.000000
      2.500000
      0.000000
      3186.000000
      10.00000
    
    
      25%
      776.000000
      604.000000
      242.000000
      15.000000
      41.000000
      992.000000
      95.000000
      7320.000000
      3597.000000
      470.000000
      850.000000
      62.000000
      71.000000
      11.500000
      13.000000
      6751.000000
      53.00000
    
    
      50%
      1558.000000
      1110.000000
      434.000000
      23.000000
      54.000000
      1707.000000
      353.000000
      9990.000000
      4200.000000
      500.000000
      1200.000000
      75.000000
      82.000000
      13.600000
      21.000000
      8377.000000
      65.00000
    
    
      75%
      3624.000000
      2424.000000
      902.000000
      35.000000
      69.000000
      4005.000000
      967.000000
      12925.000000
      5050.000000
      600.000000
      1700.000000
      85.000000
      92.000000
      16.500000
      31.000000
      10830.000000
      78.00000
    
    
      max
      48094.000000
      26330.000000
      6392.000000
      96.000000
      100.000000
      31643.000000
      21836.000000
      21700.000000
      8124.000000
      2340.000000
      6800.000000
      103.000000
      100.000000
      39.800000
      64.000000
      56233.000000
      118.00000

8c ii



In [10]:

    
sns.pairplot(college.iloc[:, 2:11])









    Out[10]:





<seaborn.axisgrid.PairGrid at 0x8a5f828>

8c iii



In [11]:

    
sns.boxplot(x='Private', y='Outstate', data=college)









    Out[11]:





<matplotlib.axes._subplots.AxesSubplot at 0xd6bcfd0>

8c iv



In [12]:

    
college.loc[college.loc[:, 'Top10perc'] > 50, 'Elite'] = 'Yes'
college.loc[college.loc[:, 'Top10perc'] <= 50, 'Elite'] = 'No'
print(college.Elite.value_counts())
sns.boxplot(x='Elite', y='Outstate', data=college)









    



No     699
Yes     78
dtype: int64






    Out[12]:





<matplotlib.axes._subplots.AxesSubplot at 0xe1d8860>

8c v



In [13]:

    
plt.figure(figsize=(10,10))
plt.subplot(221)
sns.distplot(college['Apps'], kde=False, bins=20)
plt.subplot(222)
sns.distplot(college['Apps'], kde=False, bins=100)
plt.subplot(223)
sns.distplot(college['Outstate'], kde=False, bins=20)
plt.subplot(224)
sns.distplot(college['Outstate'], kde=False, bins=100)









    Out[13]:





<matplotlib.axes._subplots.AxesSubplot at 0x108bbda0>

Exercise 2.9

9a



In [14]:

    
auto = pd.read_csv('data/Auto.csv')
auto.head()









    Out[14]:






  
    
      
      mpg
      cylinders
      displacement
      horsepower
      weight
      acceleration
      year
      origin
      name
    
  
  
    
      0
      18
      8
      307
      130
      3504
      12.0
      70
      1
      chevrolet chevelle malibu
    
    
      1
      15
      8
      350
      165
      3693
      11.5
      70
      1
      buick skylark 320
    
    
      2
      18
      8
      318
      150
      3436
      11.0
      70
      1
      plymouth satellite
    
    
      3
      16
      8
      304
      150
      3433
      12.0
      70
      1
      amc rebel sst
    
    
      4
      17
      8
      302
      140
      3449
      10.5
      70
      1
      ford torino

9b and 9c



In [15]:

    
auto.describe()









    Out[15]:






  
    
      
      mpg
      cylinders
      displacement
      weight
      acceleration
      year
      origin
    
  
  
    
      count
      397.000000
      397.000000
      397.000000
      397.000000
      397.000000
      397.000000
      397.000000
    
    
      mean
      23.515869
      5.458438
      193.532746
      2970.261965
      15.555668
      75.994962
      1.574307
    
    
      std
      7.825804
      1.701577
      104.379583
      847.904119
      2.749995
      3.690005
      0.802549
    
    
      min
      9.000000
      3.000000
      68.000000
      1613.000000
      8.000000
      70.000000
      1.000000
    
    
      25%
      17.500000
      4.000000
      104.000000
      2223.000000
      13.800000
      73.000000
      1.000000
    
    
      50%
      23.000000
      4.000000
      146.000000
      2800.000000
      15.500000
      76.000000
      1.000000
    
    
      75%
      29.000000
      8.000000
      262.000000
      3609.000000
      17.100000
      79.000000
      2.000000
    
    
      max
      46.600000
      8.000000
      455.000000
      5140.000000
      24.800000
      82.000000
      3.000000

9d



In [17]:

    
ss1 = auto[:10]
ss2 = auto[85:]
subset = pd.concat([ss1, ss2])
subset.describe()









    Out[17]:






  
    
      
      mpg
      cylinders
      displacement
      weight
      acceleration
      year
      origin
    
  
  
    
      count
      322.000000
      322.000000
      322.000000
      322.000000
      322.000000
      322.000000
      322.000000
    
    
      mean
      24.409317
      5.378882
      187.680124
      2936.807453
      15.700621
      77.130435
      1.596273
    
    
      std
      7.913357
      1.657398
      100.120925
      810.987533
      2.706436
      3.131849
      0.815572
    
    
      min
      11.000000
      3.000000
      68.000000
      1649.000000
      8.500000
      70.000000
      1.000000
    
    
      25%
      18.000000
      4.000000
      100.250000
      2216.000000
      14.000000
      75.000000
      1.000000
    
    
      50%
      23.900000
      4.000000
      145.500000
      2797.500000
      15.500000
      77.000000
      1.000000
    
    
      75%
      30.650000
      6.000000
      250.000000
      3516.000000
      17.275000
      80.000000
      2.000000
    
    
      max
      46.600000
      8.000000
      455.000000
      4997.000000
      24.800000
      82.000000
      3.000000

9e



In [18]:

    
sns.pairplot(auto.iloc[:, :8])









    Out[18]:





<seaborn.axisgrid.PairGrid at 0x107c0f60>

9f



In [22]:

    
plt.figure(figsize=(12,6))
plt.subplot(121)
sns.boxplot(x='cylinders', y='mpg', data=auto)
plt.subplot(122)
sns.regplot(x='weight', y='mpg', data=auto, scatter_kws={'color': 'red'})









    Out[22]:





<matplotlib.axes._subplots.AxesSubplot at 0x150f86d8>

Exercise 2.10

10a and d



In [24]:

    
boston = pd.read_csv('data/Boston.csv')
boston.describe()

10b and c



In [25]:

    
sns.pairplot(boston[1:])









    Out[25]:





<seaborn.axisgrid.PairGrid at 0x152dc2b0>

10 e



In [27]:

    
boston.chas.sum() # value is 1 if next to Charles otherwise value is zero so sum is number next to Charles









    Out[27]:





35

10 f



In [28]:

    
boston.ptratio.median()









    Out[28]:





19.05

10 g



In [30]:

    
boston[boston.medv == boston.medv.min()]

10 h



In [31]:

    
boston[boston.rm >= 7].describe()



In [32]:

    
boston[boston.rm >= 8].describe()



In [ ]:

	Unnamed: 0	TV	Radio	Newspaper	Sales
0	1	230.1	37.8	69.2	22.1
1	2	44.5	39.3	45.1	10.4
2	3	17.2	45.9	69.3	9.3
3	4	151.5	41.3	58.5	18.5
4	5	180.8	10.8	58.4	12.9

	Unnamed: 0	crim	zn	indus	chas	nox	rm	age	dis	rad	tax	ptratio	black	lstat	medv
count	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000
mean	253.500000	3.613524	11.363636	11.136779	0.069170	0.554695	6.284634	68.574901	3.795043	9.549407	408.237154	18.455534	356.674032	12.653063	22.532806
std	146.213884	8.601545	23.322453	6.860353	0.253994	0.115878	0.702617	28.148861	2.105710	8.707259	168.537116	2.164946	91.294864	7.141062	9.197104
min	1.000000	0.006320	0.000000	0.460000	0.000000	0.385000	3.561000	2.900000	1.129600	1.000000	187.000000	12.600000	0.320000	1.730000	5.000000
25%	127.250000	0.082045	0.000000	5.190000	0.000000	0.449000	5.885500	45.025000	2.100175	4.000000	279.000000	17.400000	375.377500	6.950000	17.025000
50%	253.500000	0.256510	0.000000	9.690000	0.000000	0.538000	6.208500	77.500000	3.207450	5.000000	330.000000	19.050000	391.440000	11.360000	21.200000
75%	379.750000	3.677082	12.500000	18.100000	0.000000	0.624000	6.623500	94.075000	5.188425	24.000000	666.000000	20.200000	396.225000	16.955000	25.000000
max	506.000000	88.976200	100.000000	27.740000	1.000000	0.871000	8.780000	100.000000	12.126500	24.000000	711.000000	22.000000	396.900000	37.970000	50.000000

	Unnamed: 0	crim	zn	indus	chas	nox	rm	age	dis	rad	tax	ptratio	black	lstat	medv
398	399	38.3518	0	18.1	0	0.693	5.453	100	1.4896	24	666	20.2	396.90	30.59	5
405	406	67.9208	0	18.1	0	0.693	5.683	100	1.4254	24	666	20.2	384.97	22.98	5

	Unnamed: 0	crim	zn	indus	chas	nox	rm	age	dis	rad	tax	ptratio	black	lstat	medv
count	64.000000	64.000000	64.000000	64.000000	64.000000	64.000000	64.000000	64.000000	64.000000	64.000000	64.000000	64.000000	64.000000	64.000000	64.000000
mean	224.015625	0.979109	28.171875	5.775625	0.125000	0.504455	7.570094	60.640625	4.199617	5.984375	312.234375	16.259375	388.275156	5.474062	38.396875
std	93.675308	2.807599	34.053089	5.544494	0.333333	0.092863	0.481467	27.858587	2.074423	5.655429	118.311365	2.351407	9.487005	2.906582	8.722639
min	3.000000	0.009060	0.000000	0.460000	0.000000	0.394000	7.007000	8.400000	1.202400	1.000000	193.000000	12.600000	354.310000	1.730000	15.000000
25%	186.000000	0.045023	0.000000	2.460000	0.000000	0.430250	7.183250	36.000000	2.444925	3.000000	244.750000	14.700000	384.922500	3.555000	32.975000
50%	230.500000	0.097860	20.000000	3.970000	0.000000	0.488000	7.414000	63.800000	3.495200	5.000000	273.000000	17.400000	390.660000	4.775000	36.450000
75%	270.250000	0.542893	45.000000	6.200000	0.000000	0.582500	7.858500	85.025000	5.462925	7.000000	329.000000	17.925000	395.305000	6.590000	46.175000
max	483.000000	19.609100	95.000000	19.580000	1.000000	0.718000	8.780000	100.000000	9.222900	24.000000	666.000000	20.200000	396.900000	16.740000	50.000000

	Unnamed: 0	crim	zn	indus	chas	nox	rm	age	dis	rad	tax	ptratio	black	lstat	medv
count	13.000000	13.000000	13.000000	13.000000	13.000000	13.000000	13.000000	13.000000	13.000000	13.000000	13.000000	13.000000	13.000000	13.000000	13.000000
mean	232.307692	0.718795	13.615385	7.078462	0.153846	0.539238	8.348538	71.538462	3.430192	7.461538	325.076923	16.361538	385.210769	4.310000	44.200000
std	60.915768	0.901640	26.298094	5.392767	0.375534	0.092352	0.251261	24.608723	1.883955	5.332532	110.971063	2.410580	10.529359	1.373566	8.092383
min	98.000000	0.020090	0.000000	2.680000	0.000000	0.416100	8.034000	8.400000	1.801000	2.000000	224.000000	13.000000	354.550000	2.470000	21.900000
25%	225.000000	0.331470	0.000000	3.970000	0.000000	0.504000	8.247000	70.400000	2.288500	5.000000	264.000000	14.700000	384.540000	3.320000	41.700000
50%	233.000000	0.520140	0.000000	6.200000	0.000000	0.507000	8.297000	78.300000	2.894400	7.000000	307.000000	17.400000	386.860000	4.140000	48.300000
75%	258.000000	0.578340	20.000000	6.200000	0.000000	0.605000	8.398000	86.500000	3.651900	8.000000	307.000000	17.400000	389.700000	5.120000	50.000000
max	365.000000	3.474280	95.000000	19.580000	1.000000	0.718000	8.780000	93.900000	8.906700	24.000000	666.000000	20.200000	396.900000	7.440000	50.000000

	Unnamed: 0	Private	Apps	Accept	Enroll	Top10perc	Top25perc	F.Undergrad	P.Undergrad	Outstate	Room.Board	Books	Personal	PhD	Terminal	S.F.Ratio	perc.alumni	Expend	Grad.Rate
0	Abilene Christian University	Yes	1660	1232	721	23	52	2885	537	7440	3300	450	2200	70	78	18.1	12	7041	60
1	Adelphi University	Yes	2186	1924	512	16	29	2683	1227	12280	6450	750	1500	29	30	12.2	16	10527	56
2	Adrian College	Yes	1428	1097	336	22	50	1036	99	11250	3750	400	1165	53	66	12.9	30	8735	54
3	Agnes Scott College	Yes	417	349	137	60	89	510	63	12960	5450	450	875	92	97	7.7	37	19016	59
4	Alaska Pacific University	Yes	193	146	55	16	44	249	869	7560	4120	800	1500	76	72	11.9	2	10922	15

	Apps	Accept	Enroll	Top10perc	Top25perc	F.Undergrad	P.Undergrad	Outstate	Room.Board	Books	Personal	PhD	Terminal	S.F.Ratio	perc.alumni	Expend	Grad.Rate
count	777.000000	777.000000	777.000000	777.000000	777.000000	777.000000	777.000000	777.000000	777.000000	777.000000	777.000000	777.000000	777.000000	777.000000	777.000000	777.000000	777.00000
mean	3001.638353	2018.804376	779.972973	27.558559	55.796654	3699.907336	855.298584	10440.669241	4357.526384	549.380952	1340.642214	72.660232	79.702703	14.089704	22.743887	9660.171171	65.46332
std	3870.201484	2451.113971	929.176190	17.640364	19.804778	4850.420531	1522.431887	4023.016484	1096.696416	165.105360	677.071454	16.328155	14.722359	3.958349	12.391801	5221.768440	17.17771
min	81.000000	72.000000	35.000000	1.000000	9.000000	139.000000	1.000000	2340.000000	1780.000000	96.000000	250.000000	8.000000	24.000000	2.500000	0.000000	3186.000000	10.00000
25%	776.000000	604.000000	242.000000	15.000000	41.000000	992.000000	95.000000	7320.000000	3597.000000	470.000000	850.000000	62.000000	71.000000	11.500000	13.000000	6751.000000	53.00000
50%	1558.000000	1110.000000	434.000000	23.000000	54.000000	1707.000000	353.000000	9990.000000	4200.000000	500.000000	1200.000000	75.000000	82.000000	13.600000	21.000000	8377.000000	65.00000
75%	3624.000000	2424.000000	902.000000	35.000000	69.000000	4005.000000	967.000000	12925.000000	5050.000000	600.000000	1700.000000	85.000000	92.000000	16.500000	31.000000	10830.000000	78.00000
max	48094.000000	26330.000000	6392.000000	96.000000	100.000000	31643.000000	21836.000000	21700.000000	8124.000000	2340.000000	6800.000000	103.000000	100.000000	39.800000	64.000000	56233.000000	118.00000

	mpg	cylinders	displacement	horsepower	weight	acceleration	year	origin	name
0	18	8	307	130	3504	12.0	70	1	chevrolet chevelle malibu
1	15	8	350	165	3693	11.5	70	1	buick skylark 320
2	18	8	318	150	3436	11.0	70	1	plymouth satellite
3	16	8	304	150	3433	12.0	70	1	amc rebel sst
4	17	8	302	140	3449	10.5	70	1	ford torino

	mpg	cylinders	displacement	weight	acceleration	year	origin
count	397.000000	397.000000	397.000000	397.000000	397.000000	397.000000	397.000000
mean	23.515869	5.458438	193.532746	2970.261965	15.555668	75.994962	1.574307
std	7.825804	1.701577	104.379583	847.904119	2.749995	3.690005	0.802549
min	9.000000	3.000000	68.000000	1613.000000	8.000000	70.000000	1.000000
25%	17.500000	4.000000	104.000000	2223.000000	13.800000	73.000000	1.000000
50%	23.000000	4.000000	146.000000	2800.000000	15.500000	76.000000	1.000000
75%	29.000000	8.000000	262.000000	3609.000000	17.100000	79.000000	2.000000
max	46.600000	8.000000	455.000000	5140.000000	24.800000	82.000000	3.000000

	mpg	cylinders	displacement	weight	acceleration	year	origin
count	322.000000	322.000000	322.000000	322.000000	322.000000	322.000000	322.000000
mean	24.409317	5.378882	187.680124	2936.807453	15.700621	77.130435	1.596273
std	7.913357	1.657398	100.120925	810.987533	2.706436	3.131849	0.815572
min	11.000000	3.000000	68.000000	1649.000000	8.500000	70.000000	1.000000
25%	18.000000	4.000000	100.250000	2216.000000	14.000000	75.000000	1.000000
50%	23.900000	4.000000	145.500000	2797.500000	15.500000	77.000000	1.000000
75%	30.650000	6.000000	250.000000	3516.000000	17.275000	80.000000	2.000000
max	46.600000	8.000000	455.000000	4997.000000	24.800000	82.000000	3.000000